bitkeeper revision 1.1159.212.10 (41dde8b1uPUodrtnTKejSUrLg-fcoQ)
authoriap10@labyrinth.cl.cam.ac.uk <iap10@labyrinth.cl.cam.ac.uk>
Fri, 7 Jan 2005 01:41:05 +0000 (01:41 +0000)
committeriap10@labyrinth.cl.cam.ac.uk <iap10@labyrinth.cl.cam.ac.uk>
Fri, 7 Jan 2005 01:41:05 +0000 (01:41 +0000)
xend extensiosn for VMX guests

12 files changed:
.rootkeys
tools/examples/Makefile
tools/examples/bochsrc [new file with mode: 0644]
tools/examples/mem-map.sxp [new file with mode: 0644]
tools/examples/xmexample.vmx [new file with mode: 0644]
tools/libxc/linux_boot_params.h [new file with mode: 0644]
tools/libxc/xc.h
tools/libxc/xc_vmx_build.c [new file with mode: 0644]
tools/python/xen/lowlevel/xc/xc.c
tools/python/xen/util/memmap.py [new file with mode: 0644]
tools/python/xen/xend/XendDomainInfo.py
tools/python/xen/xm/create.py

index 0b4b69a4d7c4730c3996b996469e4e75395f2581..ca97aacf1228fe950ab1b5045340c67adeb15b76 100644 (file)
--- a/.rootkeys
+++ b/.rootkeys
 401d7e16UgeqroJQTIhwkrDVkoWgZQ tools/examples/README
 41597996VhTbNuHbuscYSfRb-WR6fA tools/examples/block-enbd
 41597996GHP2_yVih2UspXh328fgMQ tools/examples/block-file
+41dde8af16Hulg1pgW8aOnbbxyrl7w tools/examples/bochsrc
 405ff55dawQyCHFEnJ067ChPRoXBBA tools/examples/init.d/xend
 40278d94cIUWl2eRgnwZtr4hTyWT1Q tools/examples/init.d/xendomains
+41dde8afTUuvdtFUlOx0ZRusKxyd8w tools/examples/mem-map.sxp
 40ee75a9xFz6S05sDKu-JCLqyVTkDA tools/examples/network
 40ee75a967sxgcRY4Q7zXoVUaJ4flA tools/examples/vif-bridge
 40ee75a93cqxHp6MiYXxxwR5j2_8QQ tools/examples/xend-config.sxp
+41dde8af6M2Pm1Rrv_f5jEFC_BIOIA tools/examples/xmexample.vmx
 41090ec8Pj_bkgCBpg2W7WfmNkumEA tools/examples/xmexample1
 40cf2937oKlROYOJTN8GWwWM5AmjBg tools/examples/xmexample2
 3fbba6dbDfYvJSsw9500b4SZyUhxjQ tools/libxc/Makefile
+41dde8afKYRKxS4XtLv1KUegGQy_bg tools/libxc/linux_boot_params.h
 41cc934abX-QLXJXW_clV_wRjM0zYg tools/libxc/plan9a.out.h
 3fbba6dc1uU7U3IFeF6A-XEOYF2MkQ tools/libxc/rpm.spec
 3fbba6dcrNxtygEcgJYAJJ1gCQqfsA tools/libxc/xc.h
 3fbba6dctWRWlFJkYb6hdix2X4WMuw tools/libxc/xc_private.c
 3fbba6dcbVrG2hPzEzwdeV_UC8kydQ tools/libxc/xc_private.h
 40589968UQFnJeOMn8UIFLbXBuwXjw tools/libxc/xc_rrobin.c
+41dde8b0pLfAKMs_L9Uri2hnzHiCRQ tools/libxc/xc_vmx_build.c
 40e1b09dMYB4ItGCqcMIzirdMd9I-w tools/libxutil/Makefile
 40e033325Sjqs-_4TuzeUEprP_gYFg tools/libxutil/allocate.c
 40e03332KYz7o1bn2MG_KPbBlyoIMA tools/libxutil/allocate.h
 40dfd40aGqGkiopOOgJxSF4iCbHM0Q tools/python/xen/util/__init__.py
 4055ee4dwy4l0MghZosxoiu6zmhc9Q tools/python/xen/util/console_client.py
 40c9c468IienauFHQ_xJIcqnPJ8giQ tools/python/xen/util/ip.py
+41dde8b0yuJX-S79w4xJKxBQ-Mhp1A tools/python/xen/util/memmap.py
 4059c6a0pnxhG8hwSOivXybbGOwuXw tools/python/xen/util/tempfile.py
 40c9c468SNuObE_YWARyS0hzTPSzKg tools/python/xen/xend/Args.py
 41597996WNvJA-DVCBmc0xU9w_XmoA tools/python/xen/xend/Blkctl.py
index 1e77f89d5023c2ea3f41c1631b66e7262f30e43d..f0f8250ea00936f99229485356da81505e089b1d 100644 (file)
@@ -8,6 +8,9 @@ XEN_CONFIG_DIR = /etc/xen
 XEN_CONFIGS = xend-config.sxp
 XEN_CONFIGS += xmexample1 
 XEN_CONFIGS += xmexample2
+XEN_CONFIGS += xmexample.vmx
+XEN_CONFIGS += mem-map.sxp
+XEN_CONFIGS += bochsrc
 
 # Xen script dir and scripts to go there.
 XEN_SCRIPT_DIR = /etc/xen/scripts
diff --git a/tools/examples/bochsrc b/tools/examples/bochsrc
new file mode 100644 (file)
index 0000000..907c78b
--- /dev/null
@@ -0,0 +1,19 @@
+#megs: 32
+#romimage: file=$BXSHARE/BIOS-bochs-latest, address=0xf0000
+#vgaromimage: $BXSHARE/VGABIOS-lgpl-latest
+floppya: 1_44=a.img, status=inserted
+floppyb: 1_44=b.img, status=inserted
+#ata0-master: type=disk, path=minibootable.img, cylinders=900, heads=15, spt=17
+# if you don't use absolute paths below, bochs looks under the cwd of xend, 
+# which is usually "/"
+ata0-master: type=disk, path=/tmp/min-fc2-i386.img, cylinders=800, heads=4, spt=32
+boot: c
+
+log: /tmp/bochsout.txt
+#debug: action=report
+info: action=report
+error: action=report
+panic: action=ask
+
+mouse: enabled=0
+ips: 1500000
diff --git a/tools/examples/mem-map.sxp b/tools/examples/mem-map.sxp
new file mode 100644 (file)
index 0000000..246b49b
--- /dev/null
@@ -0,0 +1,10 @@
+(memmap
+ (0000000000000000  000000000009f800 "AddressRangeMemory"   WB)
+ (000000000009f800  00000000000a0000 "AddressRangeReserved" UC)
+ (00000000000a0000  00000000000bffff "AddressRangeIO"       UC)
+ (00000000000f0000  0000000000100000 "AddressRangeReserved" UC)
+ (0000000000100000  0000000008000000 "AddressRangeMemory"   WB)
+ (0000000007fff000  0000000008000000 "AddressRangeShared"   WB)
+ (0000000008000000  0000000008003000 "AddressRangeNVS"      UC)
+ (0000000008003000  000000000800d000 "AddressRangeACPI"     WB)
+ (00000000fec00000  0000000100000000 "AddressRangeIO"       UC))
diff --git a/tools/examples/xmexample.vmx b/tools/examples/xmexample.vmx
new file mode 100644 (file)
index 0000000..6e90395
--- /dev/null
@@ -0,0 +1,93 @@
+#  -*- mode: python; -*-
+#============================================================================
+# Python configuration setup for 'xm create'.
+# This script sets the parameters used when a domain is created using 'xm create'.
+# You use a separate script for each domain you want to create, or 
+# you can set the parameters for the domain on the xm command line.
+#============================================================================
+
+#----------------------------------------------------------------------------
+# Kernel image file.
+kernel = "/boot/vmlinuz-rhel3-static"
+
+# Optional ramdisk.
+#ramdisk = "/boot/initrd.gz"
+
+# The domain build function. Default is 'linux'.
+builder='vmx'
+#builder='linux'
+#builder='netbsd'
+
+# Initial memory allocation (in megabytes) for the new domain.
+memory = 128
+
+# A name for your domain. All domains must have different names.
+name = "ExampleVMXDomain"
+
+# Which CPU to start domain on? 
+#cpu = -1   # leave to Xen to pick
+
+#----------------------------------------------------------------------------
+# Define network interfaces.
+
+# Number of network interfaces. Default is 1.
+#nics=1
+nics=0
+
+# Optionally define mac and/or bridge for the network interfaces.
+# Random MACs are assigned if not given.
+#vif = [ 'mac=aa:00:00:00:00:11, bridge=xen-br0' ]
+
+#----------------------------------------------------------------------------
+# Define the disk devices you want the domain to have access to, and
+# what you want them accessible as.
+# Each disk entry is of the form phy:UNAME,DEV,MODE
+# where UNAME is the device, DEV is the device name the domain will see,
+# and MODE is r for read-only, w for read-write.
+
+#disk = [ 'phy:hda1,hda1,r' ]
+
+#----------------------------------------------------------------------------
+# Set the kernel command line for the new domain.
+# You only need to define the IP parameters and hostname if the domain's
+# IP config doesn't, e.g. in ifcfg-eth0 or via DHCP.
+# You can use 'extra' to set the runlevel and custom environment
+# variables used by custom rc scripts (e.g. VMID=, usr= ).
+
+# Set if you want dhcp to allocate the IP address.
+#dhcp="dhcp"
+# Set netmask.
+#netmask=
+# Set default gateway.
+#gateway=
+# Set the hostname.
+#hostname= "vm%d" % vmid
+
+# Set root device.
+#root = "/dev/ram0"
+root = "/dev/hda1 ro"
+
+# Root device for nfs.
+#root = "/dev/nfs"
+# The nfs server.
+#nfs_server = '169.254.1.0'  
+# Root directory on the nfs server.
+#nfs_root   = '/full/path/to/root/directory'
+
+# Sets runlevel 4.
+extra = "1"
+
+#----------------------------------------------------------------------------
+# Set according to whether you want the domain restarted when it exits.
+# The default is 'onreboot', which restarts the domain when it shuts down
+# with exit code reboot.
+# Other values are 'always', and 'never'.
+
+#restart = 'onreboot'
+
+#============================================================================
+
+# New stuff
+memmap = '/etc/xen/mem-map.sxp'
+device_model = '/usr/sbin/device-model'
+device_config = '/etc/xen/bochsrc'
diff --git a/tools/libxc/linux_boot_params.h b/tools/libxc/linux_boot_params.h
new file mode 100644 (file)
index 0000000..749a41f
--- /dev/null
@@ -0,0 +1,163 @@
+#ifndef __LINUX_BOOT_PARAMS_H__
+#define __LINUX_BOOT_PARAMS_H__
+
+#include <asm/types.h>
+
+#define E820MAX        32
+
+struct mem_map {
+    int nr_map;
+    struct entry {
+        unsigned long long addr;       /* start of memory segment */
+        unsigned long long size;       /* size of memory segment */
+        unsigned long type;            /* type of memory segment */
+#define E820_RAM        1
+#define E820_RESERVED   2
+#define E820_ACPI       3 /* usable as RAM once ACPI tables have been read */
+#define E820_NVS        4
+
+        unsigned long caching_attr;    /* used by hypervisor */
+#define MEMMAP_UC      0
+#define MEMMAP_WC      1
+#define MEMMAP_WT      4
+#define MEMMAP_WP      5
+#define MEMMAP_WB      6
+
+    }map[E820MAX];
+};
+
+struct e820entry {
+       unsigned long long addr;        /* start of memory segment */
+       unsigned long long size;        /* size of memory segment */
+       unsigned long type;             /* type of memory segment */
+};
+
+struct e820map {
+    int nr_map;
+    struct e820entry map[E820MAX];
+};
+
+struct drive_info_struct { __u8 dummy[32]; }; 
+
+struct sys_desc_table { 
+    __u16 length; 
+    __u8 table[318]; 
+}; 
+
+struct screen_info {
+    unsigned char  orig_x;             /* 0x00 */
+    unsigned char  orig_y;             /* 0x01 */
+    unsigned short dontuse1;           /* 0x02 -- EXT_MEM_K sits here */
+    unsigned short orig_video_page;    /* 0x04 */
+    unsigned char  orig_video_mode;    /* 0x06 */
+    unsigned char  orig_video_cols;    /* 0x07 */
+    unsigned short unused2;            /* 0x08 */
+    unsigned short orig_video_ega_bx;  /* 0x0a */
+    unsigned short unused3;            /* 0x0c */
+    unsigned char  orig_video_lines;   /* 0x0e */
+    unsigned char  orig_video_isVGA;   /* 0x0f */
+    unsigned short orig_video_points;  /* 0x10 */
+    
+    /* VESA graphic mode -- linear frame buffer */
+    unsigned short lfb_width;          /* 0x12 */
+    unsigned short lfb_height;         /* 0x14 */
+    unsigned short lfb_depth;          /* 0x16 */
+    unsigned long  lfb_base;           /* 0x18 */
+    unsigned long  lfb_size;           /* 0x1c */
+    unsigned short dontuse2, dontuse3; /* 0x20 -- CL_MAGIC and CL_OFFSET here */
+    unsigned short lfb_linelength;     /* 0x24 */
+    unsigned char  red_size;           /* 0x26 */
+    unsigned char  red_pos;            /* 0x27 */
+    unsigned char  green_size;         /* 0x28 */
+    unsigned char  green_pos;          /* 0x29 */
+    unsigned char  blue_size;          /* 0x2a */
+    unsigned char  blue_pos;           /* 0x2b */
+    unsigned char  rsvd_size;          /* 0x2c */
+    unsigned char  rsvd_pos;           /* 0x2d */
+    unsigned short vesapm_seg;         /* 0x2e */
+    unsigned short vesapm_off;         /* 0x30 */
+    unsigned short pages;              /* 0x32 */
+                                       /* 0x34 -- 0x3f reserved for future expansion */
+};
+
+struct screen_info_overlap { 
+    __u8 reserved1[2]; /* 0x00 */ 
+    __u16 ext_mem_k; /* 0x02 */ 
+    __u8 reserved2[0x20 - 0x04]; /* 0x04 */ 
+    __u16 cl_magic; /* 0x20 */ 
+#define CL_MAGIC_VALUE 0xA33F 
+    __u16 cl_offset; /* 0x22 */ 
+    __u8 reserved3[0x40 - 0x24]; /* 0x24 */ 
+}; 
+
+
+struct apm_bios_info {
+    __u16 version;
+    __u16  cseg;
+    __u32   offset;
+    __u16  cseg_16;
+    __u16  dseg;
+    __u16  flags;
+    __u16  cseg_len;
+    __u16  cseg_16_len;
+    __u16  dseg_len;
+};
+struct linux_boot_params { 
+    union { /* 0x00 */ 
+       struct screen_info info; 
+       struct screen_info_overlap overlap; 
+    } screen; 
+    struct apm_bios_info apm_bios_info; /* 0x40 */ 
+    __u8 reserved4[0x80 - 0x54]; /* 0x54 */ 
+    struct drive_info_struct drive_info; /* 0x80 */ 
+    struct sys_desc_table sys_desc_table; /* 0xa0 */ 
+    __u32 alt_mem_k; /* 0x1e0 */ 
+    __u8 reserved5[4]; /* 0x1e4 */ 
+    __u8 e820_map_nr; /* 0x1e8 */ 
+    __u8 reserved6[8]; /* 0x1e9 */ 
+    __u8 setup_sects; /* 0x1f1 */ 
+    __u16 mount_root_rdonly; /* 0x1f2 */ 
+    __u16 syssize; /* 0x1f4 */ 
+    __u16 swapdev; /* 0x1f6 */ 
+    __u16 ramdisk_flags; /* 0x1f8 */ 
+#define RAMDISK_IMAGE_START_MASK 0x07FF 
+#define RAMDISK_PROMPT_FLAG 0x8000 
+#define RAMDISK_LOAD_FLAG 0x4000 
+    __u16 vid_mode; /* 0x1fa */ 
+    __u16 root_dev; /* 0x1fc */ 
+    __u8 reserved9[1]; /* 0x1fe */ 
+    __u8 aux_device_info; /* 0x1ff */ 
+    /* 2.00+ */ 
+    __u8 reserved10[2]; /* 0x200 */ 
+    __u8 header_magic[4]; /* 0x202 */ 
+    __u16 protocol_version; /* 0x206 */ 
+    __u8 reserved11[8]; /* 0x208 */ 
+    __u8 loader_type; /* 0x210 */ 
+#define LOADER_TYPE_LOADLIN 1 
+#define LOADER_TYPE_BOOTSECT_LOADER 2 
+#define LOADER_TYPE_SYSLINUX 3 
+#define LOADER_TYPE_ETHERBOOT 4 
+#define LOADER_TYPE_UNKNOWN 0xFF 
+    __u8 loader_flags; /* 0x211 */ 
+    __u8 reserved12[2]; /* 0x212 */ 
+    __u32 code32_start; /* 0x214 */ 
+    __u32 initrd_start; /* 0x218 */ 
+    __u32 initrd_size; /* 0x21c */ 
+    __u8 reserved13[4]; /* 0x220 */ 
+    /* 2.01+ */ 
+    __u16 heap_end_ptr; /* 0x224 */ 
+    __u8 reserved14[2]; /* 0x226 */ 
+    /* 2.02+ */ 
+    __u32 cmd_line_ptr; /* 0x228 */ 
+    /* 2.03+ */ 
+    __u32 ramdisk_max; /* 0x22c */ 
+    __u8 reserved15[0x2d0 - 0x230]; /* 0x230 */ 
+    struct e820entry e820_map[E820MAX]; /* 0x2d0 */ 
+    __u64 shared_info; /* 0x550 */
+    __u8 padding[0x800 - 0x558]; /* 0x558 */ 
+    __u8 cmd_line[0x800]; /* 0x800 */
+} __attribute__((packed)); 
+
+#endif /* __LINUX_BOOT_PARAMS_H__ */
index 72dfd3c4e63858142abcdba2d0f611ccd4689981..8114faafb2e654f83010d99f31d0db96da72a9ad 100644 (file)
@@ -10,6 +10,8 @@
 #define __XC_H__
 
 #include <stdint.h>
+#include "linux_boot_params.h"
+
 typedef uint8_t            u8;
 typedef uint16_t           u16;
 typedef uint32_t           u32;
@@ -105,6 +107,15 @@ xc_plan9_build (int xc_handle,
                unsigned int control_evtchn, 
                unsigned long flags);
 
+int xc_vmx_build(int xc_handle,
+                 u32 domid,
+                 const char *image_name,
+                 struct mem_map *memmap,
+                 const char *ramdisk_name,
+                 const char *cmdline,
+                 unsigned int control_evtchn,
+                 unsigned long flags);
+
 int xc_bvtsched_global_set(int xc_handle,
                            unsigned long ctx_allow);
 
@@ -208,4 +219,7 @@ void *xc_map_foreign_range(int xc_handle, u32 dom,
 void *xc_map_foreign_batch(int xc_handle, u32 dom, int prot,
                            unsigned long *arr, int num );
 
+int xc_get_pfn_list(int xc_handle, u32 domid, unsigned long *pfn_buf, 
+                   unsigned long max_pfns);
+
 #endif /* __XC_H__ */
diff --git a/tools/libxc/xc_vmx_build.c b/tools/libxc/xc_vmx_build.c
new file mode 100644 (file)
index 0000000..1354170
--- /dev/null
@@ -0,0 +1,865 @@
+/******************************************************************************
+ * xc_vmx_build.c
+ */
+
+#include "xc_private.h"
+#define ELFSIZE 32
+#include "xc_elf.h"
+#include <stdlib.h>
+#include <zlib.h>
+#include "linux_boot_params.h"
+
+#define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED)
+#define L2_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
+
+#define round_pgup(_p)    (((_p)+(PAGE_SIZE-1))&PAGE_MASK)
+#define round_pgdown(_p)  ((_p)&PAGE_MASK)
+
+#define LINUX_BOOT_PARAMS_ADDR   0x00090000
+#define LINUX_KERNEL_ENTR_ADDR   0x00100000
+#define LINUX_PAGE_OFFSET        0xC0000000
+
+struct domain_setup_info
+{
+    unsigned long v_start;
+    unsigned long v_end;
+    unsigned long v_kernstart;
+    unsigned long v_kernend;
+    unsigned long v_kernentry;
+
+    unsigned int use_writable_pagetables;
+    unsigned int load_bsd_symtab;
+
+    unsigned long symtab_addr;
+    unsigned long symtab_len;
+};
+
+static int
+parseelfimage(
+    char *elfbase, unsigned long elfsize, struct domain_setup_info *dsi);
+static int
+loadelfimage(
+    char *elfbase, int xch, u32 dom, unsigned long *parray,
+    unsigned long vstart);
+static int
+loadelfsymtab(
+    char *elfbase, int xch, u32 dom, unsigned long *parray,
+    struct domain_setup_info *dsi);
+
+static long get_tot_pages(int xc_handle, u32 domid)
+{
+    dom0_op_t op;
+    op.cmd = DOM0_GETDOMAININFO;
+    op.u.getdomaininfo.domain = (domid_t)domid;
+    op.u.getdomaininfo.ctxt = NULL;
+    return (do_dom0_op(xc_handle, &op) < 0) ? 
+        -1 : op.u.getdomaininfo.tot_pages;
+}
+
+int xc_get_pfn_list(int xc_handle,
+                u32 domid, 
+                unsigned long *pfn_buf, 
+                unsigned long max_pfns)
+{
+    dom0_op_t op;
+    int ret;
+    op.cmd = DOM0_GETMEMLIST;
+    op.u.getmemlist.domain   = (domid_t)domid;
+    op.u.getmemlist.max_pfns = max_pfns;
+    op.u.getmemlist.buffer   = pfn_buf;
+
+
+    if ( mlock(pfn_buf, max_pfns * sizeof(unsigned long)) != 0 )
+        return -1;
+
+    ret = do_dom0_op(xc_handle, &op);
+
+    (void)munlock(pfn_buf, max_pfns * sizeof(unsigned long));
+
+    return (ret < 0) ? -1 : op.u.getmemlist.num_pfns;
+}
+
+static int copy_to_domain_page(int xc_handle,
+                               u32 domid,
+                               unsigned long dst_pfn, 
+                               void *src_page)
+{
+    void *vaddr = xc_map_foreign_range(
+        xc_handle, domid, PAGE_SIZE, PROT_WRITE, dst_pfn);
+    if ( vaddr == NULL )
+        return -1;
+    memcpy(vaddr, src_page, PAGE_SIZE);
+    munmap(vaddr, PAGE_SIZE);
+    return 0;
+}
+
+static int setup_guestos(int xc_handle,
+                         u32 dom,
+                         char *image, unsigned long image_size,
+                         gzFile initrd_gfd, unsigned long initrd_len,
+                         unsigned long nr_pages,
+                         full_execution_context_t *ctxt,
+                         const char *cmdline,
+                         unsigned long shared_info_frame,
+                         unsigned int control_evtchn,
+                         unsigned long flags,
+                         struct mem_map * mem_mapp)
+{
+    l1_pgentry_t *vl1tab=NULL, *vl1e=NULL;
+    l2_pgentry_t *vl2tab=NULL, *vl2e=NULL;
+    unsigned long *page_array = NULL;
+    unsigned long l2tab;
+    unsigned long l1tab;
+    unsigned long count, i;
+    shared_info_t *shared_info;
+    struct linux_boot_params * boot_paramsp;
+    __u16 * boot_gdtp;
+    mmu_t *mmu = NULL;
+    int rc;
+
+    unsigned long nr_pt_pages;
+    unsigned long ppt_alloc;
+
+    struct domain_setup_info dsi;
+    unsigned long vinitrd_start;
+    unsigned long vinitrd_end;
+    unsigned long vboot_params_start;
+    unsigned long vboot_params_end;
+    unsigned long vboot_gdt_start;
+    unsigned long vboot_gdt_end;
+    unsigned long vpt_start;
+    unsigned long vpt_end;
+    unsigned long v_end;
+
+    memset(&dsi, 0, sizeof(struct domain_setup_info));
+
+    rc = parseelfimage(image, image_size, &dsi);
+    if ( rc != 0 )
+        goto error_out;
+
+    if (dsi.use_writable_pagetables)
+        xc_domain_setvmassist(xc_handle, dom, VMASST_CMD_enable,
+                              VMASST_TYPE_writable_pagetables);
+
+    if (dsi.load_bsd_symtab)
+        loadelfsymtab(image, xc_handle, dom, NULL, &dsi);
+
+    if ( (dsi.v_start & (PAGE_SIZE-1)) != 0 )
+    {
+        PERROR("Guest OS must load to a page boundary.\n");
+        goto error_out;
+    }
+
+    /*
+     * Why do we need this? The number of page-table frames depends on the 
+     * size of the bootstrap address space. But the size of the address space 
+     * depends on the number of page-table frames (since each one is mapped 
+     * read-only). We have a pair of simultaneous equations in two unknowns, 
+     * which we solve by exhaustive search.
+     */
+    nr_pt_pages = 1 + (nr_pages >> (PAGE_SHIFT - 2));
+    vboot_params_start = LINUX_BOOT_PARAMS_ADDR;
+    vboot_params_end   = vboot_params_start + PAGE_SIZE;
+    vboot_gdt_start    = vboot_params_end;
+    vboot_gdt_end      = vboot_gdt_start + PAGE_SIZE;
+    v_end              = nr_pages << PAGE_SHIFT;
+    vpt_end            = v_end - (16 << PAGE_SHIFT); /* leaving the top 64k untouched */
+    vpt_start          = vpt_end - (nr_pt_pages << PAGE_SHIFT);
+    vinitrd_end        = vpt_start;
+    vinitrd_start      = vinitrd_end - initrd_len;
+    vinitrd_start      = vinitrd_start & (~(PAGE_SIZE - 1));
+
+    if(initrd_len == 0)
+        vinitrd_start = vinitrd_end = 0;
+
+    printf("VIRTUAL MEMORY ARRANGEMENT:\n"
+           " Boot_params:   %08lx->%08lx\n"
+           " boot_gdt:      %08lx->%08lx\n"
+           " Loaded kernel: %08lx->%08lx\n"
+           " Init. ramdisk: %08lx->%08lx\n"
+           " Page tables:   %08lx->%08lx\n"
+           " TOTAL:         %08lx->%08lx\n",
+           vboot_params_start, vboot_params_end,
+           vboot_gdt_start, vboot_gdt_end,
+           dsi.v_kernstart, dsi.v_kernend, 
+           vinitrd_start, vinitrd_end,
+           vpt_start, vpt_end,
+           dsi.v_start, v_end);
+    printf(" ENTRY ADDRESS: %08lx\n", dsi.v_kernentry);
+    printf(" INITRD LENGTH: %08lx\n", initrd_len);
+
+    if ( (v_end - dsi.v_start) > (nr_pages * PAGE_SIZE) )
+    {
+        printf("Initial guest OS requires too much space\n"
+               "(%luMB is greater than %luMB limit)\n",
+               (v_end-dsi.v_start)>>20, (nr_pages<<PAGE_SHIFT)>>20);
+        goto error_out;
+    }
+
+    if ( (page_array = malloc(nr_pages * sizeof(unsigned long))) == NULL )
+    {
+        PERROR("Could not allocate memory");
+        goto error_out;
+    }
+
+    if ( xc_get_pfn_list(xc_handle, dom, page_array, nr_pages) != nr_pages )
+    {
+        PERROR("Could not get the page frame list");
+        goto error_out;
+    }
+
+    loadelfimage(image, xc_handle, dom, page_array, dsi.v_start);
+
+    if (dsi.load_bsd_symtab)
+        loadelfsymtab(image, xc_handle, dom, page_array, &dsi);
+
+    /* Load the initial ramdisk image. */
+    if ( initrd_len != 0 )
+    {
+        for ( i = (vinitrd_start - dsi.v_start); 
+              i < (vinitrd_end - dsi.v_start); i += PAGE_SIZE )
+        {
+            char page[PAGE_SIZE];
+            if ( gzread(initrd_gfd, page, PAGE_SIZE) == -1 )
+            {
+                PERROR("Error reading initrd image, could not");
+                goto error_out;
+            }
+            copy_to_domain_page(xc_handle, dom,
+                                page_array[i>>PAGE_SHIFT], page);
+        }
+    }
+
+    if ( (mmu = init_mmu_updates(xc_handle, dom)) == NULL )
+        goto error_out;
+
+    /* First allocate page for page dir. */
+    ppt_alloc = (vpt_start - dsi.v_start) >> PAGE_SHIFT;
+    l2tab = page_array[ppt_alloc++] << PAGE_SHIFT;
+    ctxt->pt_base = l2tab;
+
+    /* Initialise the page tables. */
+    if ( (vl2tab = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE, 
+                                        PROT_READ|PROT_WRITE, 
+                                        l2tab >> PAGE_SHIFT)) == NULL )
+        goto error_out;
+    memset(vl2tab, 0, PAGE_SIZE);
+    vl2e = &vl2tab[l2_table_offset(dsi.v_start)];
+    for ( count = 0; count < ((v_end-dsi.v_start)>>PAGE_SHIFT); count++ )
+    {    
+        if ( ((unsigned long)vl1e & (PAGE_SIZE-1)) == 0 )
+        {
+            l1tab = page_array[ppt_alloc++] << PAGE_SHIFT;
+            if ( vl1tab != NULL )
+                munmap(vl1tab, PAGE_SIZE);
+            if ( (vl1tab = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
+                                                PROT_READ|PROT_WRITE,
+                                                l1tab >> PAGE_SHIFT)) == NULL )
+            {
+                munmap(vl2tab, PAGE_SIZE);
+                goto error_out;
+            }
+            memset(vl1tab, 0, PAGE_SIZE);
+            vl1e = &vl1tab[l1_table_offset(dsi.v_start + (count<<PAGE_SHIFT))];
+            *vl2e++ = l1tab | L2_PROT;
+        }
+
+        *vl1e = (page_array[count] << PAGE_SHIFT) | L1_PROT;
+        if ( (count >= ((vpt_start-dsi.v_start)>>PAGE_SHIFT)) && 
+             (count <  ((vpt_end  -dsi.v_start)>>PAGE_SHIFT)) )
+            *vl1e &= ~_PAGE_RW;
+        vl1e++;
+    }
+    munmap(vl1tab, PAGE_SIZE);
+    munmap(vl2tab, PAGE_SIZE);
+
+    /*
+     * Pin down l2tab addr as page dir page - causes hypervisor to provide
+     * correct protection for the page
+     */ 
+    if ( add_mmu_update(xc_handle, mmu,
+                        l2tab | MMU_EXTENDED_COMMAND, MMUEXT_PIN_L2_TABLE) )
+        goto error_out;
+
+    boot_paramsp = xc_map_foreign_range(
+        xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE,
+        page_array[(vboot_params_start-dsi.v_start)>>PAGE_SHIFT]);
+    memset(boot_paramsp, 0, sizeof(*boot_paramsp));
+
+    strncpy(boot_paramsp->cmd_line, cmdline, 0x800);
+    boot_paramsp->cmd_line[0x800-1] = '\0';
+    boot_paramsp->cmd_line_ptr = ((unsigned long) vboot_params_start) + offsetof(struct linux_boot_params, cmd_line);
+
+    boot_paramsp->setup_sects = 0;
+    boot_paramsp->mount_root_rdonly = 1;
+    boot_paramsp->swapdev = 0x0; 
+    boot_paramsp->ramdisk_flags = 0x0; 
+    boot_paramsp->root_dev = 0x0; /* We must tell kernel root dev by kernel command line. */
+
+    /* we don't have a ps/2 mouse now.
+     * 0xAA means a aux mouse is there.
+     * See detect_auxiliary_port() in pc_keyb.c.
+     */
+    boot_paramsp->aux_device_info = 0x0; 
+
+    boot_paramsp->header_magic[0] = 0x48; /* "H" */
+    boot_paramsp->header_magic[1] = 0x64; /* "d" */
+    boot_paramsp->header_magic[2] = 0x72; /* "r" */
+    boot_paramsp->header_magic[3] = 0x53; /* "S" */
+
+    boot_paramsp->protocol_version = 0x0203; /* 2.03 */
+    boot_paramsp->loader_type = 0x71; /* GRUB */
+    boot_paramsp->loader_flags = 0x1; /* loaded high */
+    boot_paramsp->code32_start = LINUX_KERNEL_ENTR_ADDR; /* 1MB */
+    boot_paramsp->initrd_start = vinitrd_start;
+    boot_paramsp->initrd_size = initrd_len;
+
+    i = (nr_pages >> (PAGE_SHIFT - 10)) - (1 << 10) - 4;
+    boot_paramsp->alt_mem_k = i; /* alt_mem_k */
+    boot_paramsp->screen.overlap.ext_mem_k = i & 0xFFFF; /* ext_mem_k */
+
+    /*
+     * Stuff SCREAN_INFO
+     */
+    boot_paramsp->screen.info.orig_x = 0;
+    boot_paramsp->screen.info.orig_y = 0;
+    boot_paramsp->screen.info.orig_video_page = 8;
+    boot_paramsp->screen.info.orig_video_mode = 3;
+    boot_paramsp->screen.info.orig_video_cols = 80;
+    boot_paramsp->screen.info.orig_video_ega_bx = 0;
+    boot_paramsp->screen.info.orig_video_lines = 25;
+    boot_paramsp->screen.info.orig_video_isVGA = 1;
+    boot_paramsp->screen.info.orig_video_points = 0x0010;
+
+    /* seems we may NOT stuff boot_paramsp->apm_bios_info */
+    /* seems we may NOT stuff boot_paramsp->drive_info */
+    /* seems we may NOT stuff boot_paramsp->sys_desc_table */
+    *((unsigned short *) &boot_paramsp->drive_info.dummy[0]) = 800;
+    boot_paramsp->drive_info.dummy[2] = 4;
+    boot_paramsp->drive_info.dummy[14] = 32;
+
+    boot_paramsp->e820_map_nr = mem_mapp->nr_map;
+    for (i=0; i<mem_mapp->nr_map; i++) {
+        boot_paramsp->e820_map[i].addr = mem_mapp->map[i].addr; 
+        boot_paramsp->e820_map[i].size = mem_mapp->map[i].size; 
+        boot_paramsp->e820_map[i].type = mem_mapp->map[i].type; 
+    }
+    munmap(boot_paramsp, PAGE_SIZE); 
+
+    boot_gdtp = xc_map_foreign_range(
+        xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE,
+        page_array[(vboot_gdt_start-dsi.v_start)>>PAGE_SHIFT]);
+    memset(boot_gdtp, 0, PAGE_SIZE);
+    boot_gdtp[12*4 + 0] = boot_gdtp[13*4 + 0] = 0xffff; /* limit */
+    boot_gdtp[12*4 + 1] = boot_gdtp[13*4 + 1] = 0x0000; /* base */
+    boot_gdtp[12*4 + 2] = 0x9a00; boot_gdtp[13*4 + 2] = 0x9200; /* perms */
+    boot_gdtp[12*4 + 3] = boot_gdtp[13*4 + 3] = 0x00cf; /* granu + top of limit */
+    munmap(boot_gdtp, PAGE_SIZE);
+
+    /* shared_info page starts its life empty. */
+    shared_info = xc_map_foreign_range(
+        xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE, shared_info_frame);
+    memset(shared_info, 0, sizeof(shared_info_t));
+    /* Mask all upcalls... */
+    for ( i = 0; i < MAX_VIRT_CPUS; i++ )
+        shared_info->vcpu_data[i].evtchn_upcall_mask = 1;
+    munmap(shared_info, PAGE_SIZE);
+
+    /* Send the page update requests down to the hypervisor. */
+    if ( finish_mmu_updates(xc_handle, mmu) )
+        goto error_out;
+
+    free(mmu);
+    free(page_array);
+
+    /*
+     * Initial register values:
+     */
+    ctxt->cpu_ctxt.ds = 0x68;
+    ctxt->cpu_ctxt.es = 0x0;
+    ctxt->cpu_ctxt.fs = 0x0;
+    ctxt->cpu_ctxt.gs = 0x0;
+    ctxt->cpu_ctxt.ss = 0x68;
+    ctxt->cpu_ctxt.cs = 0x60;
+    ctxt->cpu_ctxt.eip = dsi.v_kernentry;
+    ctxt->cpu_ctxt.edx = vboot_gdt_start;
+    ctxt->cpu_ctxt.eax = 0x800;
+    ctxt->cpu_ctxt.esp = vboot_gdt_end;
+    ctxt->cpu_ctxt.ebx = 0;    /* startup_32 expects this to be 0 to signal boot cpu */
+    ctxt->cpu_ctxt.ecx = mem_mapp->nr_map;
+    ctxt->cpu_ctxt.esi = vboot_params_start;
+    ctxt->cpu_ctxt.edi = vboot_params_start + 0x2d0;
+
+    ctxt->cpu_ctxt.eflags = (1<<2);
+
+    return 0;
+
+ error_out:
+    if ( mmu != NULL )
+        free(mmu);
+    if ( page_array != NULL )
+        free(page_array);
+    return -1;
+}
+
+static unsigned long get_filesz(int fd)
+{
+    u16 sig;
+    u32 _sz = 0;
+    unsigned long sz;
+
+    lseek(fd, 0, SEEK_SET);
+    read(fd, &sig, sizeof(sig));
+    sz = lseek(fd, 0, SEEK_END);
+    if ( sig == 0x8b1f ) /* GZIP signature? */
+    {
+        lseek(fd, -4, SEEK_END);
+        read(fd, &_sz, 4);
+        sz = _sz;
+    }
+    lseek(fd, 0, SEEK_SET);
+
+    return sz;
+}
+
+static char *read_kernel_image(const char *filename, unsigned long *size)
+{
+    int kernel_fd = -1;
+    gzFile kernel_gfd = NULL;
+    char *image = NULL;
+    unsigned int bytes;
+
+    if ( (kernel_fd = open(filename, O_RDONLY)) < 0 )
+    {
+        PERROR("Could not open kernel image");
+        goto out;
+    }
+
+    *size = get_filesz(kernel_fd);
+
+    if ( (kernel_gfd = gzdopen(kernel_fd, "rb")) == NULL )
+    {
+        PERROR("Could not allocate decompression state for state file");
+        goto out;
+    }
+
+    if ( (image = malloc(*size)) == NULL )
+    {
+        PERROR("Could not allocate memory for kernel image");
+        goto out;
+    }
+
+    if ( (bytes = gzread(kernel_gfd, image, *size)) != *size )
+    {
+        PERROR("Error reading kernel image, could not"
+               " read the whole image (%d != %ld).", bytes, *size);
+        free(image);
+        image = NULL;
+    }
+
+ out:
+    if ( kernel_gfd != NULL )
+        gzclose(kernel_gfd);
+    else if ( kernel_fd >= 0 )
+        close(kernel_fd);
+    return image;
+}
+
+#define VMX_FEATURE_FLAG 0x20
+
+int vmx_identify(void)
+{
+    int eax, ecx;
+
+    __asm__ __volatile__ ("cpuid" 
+                         : "=a" (eax), "=c" (ecx) 
+                         : "0" (1) 
+                         : "bx", "dx");
+    if (!(ecx & VMX_FEATURE_FLAG)) {
+        return -1;
+    }
+    return 0;
+}
+
+int xc_vmx_build(int xc_handle,
+                   u32 domid,
+                   const char *image_name,
+                   struct mem_map *mem_mapp,
+                   const char *ramdisk_name,
+                   const char *cmdline,
+                   unsigned int control_evtchn,
+                   unsigned long flags)
+{
+    dom0_op_t launch_op, op;
+    int initrd_fd = -1;
+    gzFile initrd_gfd = NULL;
+    int rc, i;
+    full_execution_context_t st_ctxt, *ctxt = &st_ctxt;
+    unsigned long nr_pages;
+    char         *image = NULL;
+    unsigned long image_size, initrd_size=0;
+
+    if ( vmx_identify() < 0 )
+    {
+        PERROR("CPU doesn't support VMX Extensions");
+        goto error_out;
+    }
+    
+    if ( (nr_pages = get_tot_pages(xc_handle, domid)) < 0 )
+    {
+        PERROR("Could not find total pages for domain");
+        goto error_out;
+    }
+
+    if ( (image = read_kernel_image(image_name, &image_size)) == NULL )
+        goto error_out;
+
+    if ( (ramdisk_name != NULL) && (strlen(ramdisk_name) != 0) )
+    {
+        if ( (initrd_fd = open(ramdisk_name, O_RDONLY)) < 0 )
+        {
+            PERROR("Could not open the initial ramdisk image");
+            goto error_out;
+        }
+
+        initrd_size = get_filesz(initrd_fd);
+
+        if ( (initrd_gfd = gzdopen(initrd_fd, "rb")) == NULL )
+        {
+            PERROR("Could not allocate decompression state for initrd");
+            goto error_out;
+        }
+    }
+
+    if ( mlock(&st_ctxt, sizeof(st_ctxt) ) )
+    {   
+        PERROR("Unable to mlock ctxt");
+        return 1;
+    }
+
+    op.cmd = DOM0_GETDOMAININFO;
+    op.u.getdomaininfo.domain = (domid_t)domid;
+    op.u.getdomaininfo.ctxt = ctxt;
+    if ( (do_dom0_op(xc_handle, &op) < 0) || 
+         ((u16)op.u.getdomaininfo.domain != domid) )
+    {
+        PERROR("Could not get info on domain");
+        goto error_out;
+    }
+    if ( !(op.u.getdomaininfo.flags & DOMFLAGS_PAUSED) ||
+         (ctxt->pt_base != 0) )
+    {
+        ERROR("Domain is already constructed");
+        goto error_out;
+    }
+
+    if ( setup_guestos(xc_handle, domid, image, image_size, 
+                       initrd_gfd, initrd_size, nr_pages, 
+                       ctxt, cmdline,
+                       op.u.getdomaininfo.shared_info_frame,
+                       control_evtchn, flags, mem_mapp) < 0 )
+    {
+        ERROR("Error constructing guest OS");
+        goto error_out;
+    }
+
+    if ( initrd_fd >= 0 )
+        close(initrd_fd);
+    if ( initrd_gfd )
+        gzclose(initrd_gfd);
+    if ( image != NULL )
+        free(image);
+
+    ctxt->flags = ECF_VMX_GUEST;
+    /* FPU is set up to default initial state. */
+    memset(ctxt->fpu_ctxt, 0, sizeof(ctxt->fpu_ctxt));
+
+    /* Virtual IDT is empty at start-of-day. */
+    for ( i = 0; i < 256; i++ )
+    {
+        ctxt->trap_ctxt[i].vector = i;
+        ctxt->trap_ctxt[i].cs     = FLAT_GUESTOS_CS;
+    }
+    ctxt->fast_trap_idx = 0;
+
+    /* No LDT. */
+    ctxt->ldt_ents = 0;
+    
+    /* Use the default Xen-provided GDT. */
+    ctxt->gdt_ents = 0;
+
+    /* Ring 1 stack is the initial stack. */
+/*
+    ctxt->guestos_ss  = FLAT_GUESTOS_DS;
+    ctxt->guestos_esp = vstartinfo_start;
+*/
+    /* No debugging. */
+    memset(ctxt->debugreg, 0, sizeof(ctxt->debugreg));
+
+    /* No callback handlers. */
+    ctxt->event_callback_cs     = FLAT_GUESTOS_CS;
+    ctxt->event_callback_eip    = 0;
+    ctxt->failsafe_callback_cs  = FLAT_GUESTOS_CS;
+    ctxt->failsafe_callback_eip = 0;
+
+    memset( &launch_op, 0, sizeof(launch_op) );
+
+    launch_op.u.builddomain.domain   = (domid_t)domid;
+    launch_op.u.builddomain.ctxt = ctxt;
+
+    launch_op.cmd = DOM0_BUILDDOMAIN;
+    rc = do_dom0_op(xc_handle, &launch_op);
+    return rc;
+
+ error_out:
+    if ( initrd_gfd != NULL )
+        gzclose(initrd_gfd);
+    else if ( initrd_fd >= 0 )
+        close(initrd_fd);
+    if ( image != NULL )
+        free(image);
+
+    return -1;
+}
+
+static inline int is_loadable_phdr(Elf_Phdr *phdr)
+{
+    return ((phdr->p_type == PT_LOAD) &&
+            ((phdr->p_flags & (PF_W|PF_X)) != 0));
+}
+
+static int parseelfimage(char *elfbase, 
+                         unsigned long elfsize,
+                         struct domain_setup_info *dsi)
+{
+    Elf_Ehdr *ehdr = (Elf_Ehdr *)elfbase;
+    Elf_Phdr *phdr;
+    Elf_Shdr *shdr;
+    unsigned long kernstart = ~0UL, kernend=0UL;
+    char *shstrtab;
+    int h;
+
+    if ( !IS_ELF(*ehdr) )
+    {
+        ERROR("Kernel image does not have an ELF header.");
+        return -EINVAL;
+    }
+
+    if ( (ehdr->e_phoff + (ehdr->e_phnum * ehdr->e_phentsize)) > elfsize )
+    {
+        ERROR("ELF program headers extend beyond end of image.");
+        return -EINVAL;
+    }
+
+    if ( (ehdr->e_shoff + (ehdr->e_shnum * ehdr->e_shentsize)) > elfsize )
+    {
+        ERROR("ELF section headers extend beyond end of image.");
+        return -EINVAL;
+    }
+
+    /* Find the section-header strings table. */
+    if ( ehdr->e_shstrndx == SHN_UNDEF )
+    {
+        ERROR("ELF image has no section-header strings table (shstrtab).");
+        return -EINVAL;
+    }
+    shdr = (Elf_Shdr *)(elfbase + ehdr->e_shoff + 
+                        (ehdr->e_shstrndx*ehdr->e_shentsize));
+    shstrtab = elfbase + shdr->sh_offset;
+    
+    for ( h = 0; h < ehdr->e_phnum; h++ ) 
+    {
+        phdr = (Elf_Phdr *)(elfbase + ehdr->e_phoff + (h*ehdr->e_phentsize));
+        if ( !is_loadable_phdr(phdr) )
+            continue;
+        if ( phdr->p_vaddr < kernstart )
+            kernstart = phdr->p_vaddr;
+        if ( (phdr->p_vaddr + phdr->p_memsz) > kernend )
+            kernend = phdr->p_vaddr + phdr->p_memsz;
+    }
+
+    if ( (kernstart > kernend) || 
+         (ehdr->e_entry < kernstart) || 
+         (ehdr->e_entry > kernend) )
+    {
+        ERROR("Malformed ELF image.");
+        return -EINVAL;
+    }
+
+    dsi->v_start = 0x00000000;
+    dsi->use_writable_pagetables = 0;
+    dsi->load_bsd_symtab = 0;
+
+    dsi->v_kernstart = kernstart - LINUX_PAGE_OFFSET;
+    dsi->v_kernend   = kernend - LINUX_PAGE_OFFSET;
+    dsi->v_kernentry = LINUX_KERNEL_ENTR_ADDR;
+
+    dsi->v_end       = dsi->v_kernend;
+
+    return 0;
+}
+
+static int
+loadelfimage(
+    char *elfbase, int xch, u32 dom, unsigned long *parray,
+    unsigned long vstart)
+{
+    Elf_Ehdr *ehdr = (Elf_Ehdr *)elfbase;
+    Elf_Phdr *phdr;
+    int h;
+
+    char         *va;
+    unsigned long pa, done, chunksz;
+
+    for ( h = 0; h < ehdr->e_phnum; h++ ) 
+    {
+        phdr = (Elf_Phdr *)(elfbase + ehdr->e_phoff + (h*ehdr->e_phentsize));
+        if ( !is_loadable_phdr(phdr) )
+            continue;
+        
+        for ( done = 0; done < phdr->p_filesz; done += chunksz )
+        {
+            pa = (phdr->p_vaddr + done) - vstart - LINUX_PAGE_OFFSET;
+            va = xc_map_foreign_range(
+                xch, dom, PAGE_SIZE, PROT_WRITE, parray[pa>>PAGE_SHIFT]);
+            chunksz = phdr->p_filesz - done;
+            if ( chunksz > (PAGE_SIZE - (pa & (PAGE_SIZE-1))) )
+                chunksz = PAGE_SIZE - (pa & (PAGE_SIZE-1));
+            memcpy(va + (pa & (PAGE_SIZE-1)),
+                   elfbase + phdr->p_offset + done, chunksz);
+            munmap(va, PAGE_SIZE);
+        }
+
+        for ( ; done < phdr->p_memsz; done += chunksz )
+        {
+            pa = (phdr->p_vaddr + done) - vstart - LINUX_PAGE_OFFSET;
+            va = xc_map_foreign_range(
+                xch, dom, PAGE_SIZE, PROT_WRITE, parray[pa>>PAGE_SHIFT]);
+            chunksz = phdr->p_memsz - done;
+            if ( chunksz > (PAGE_SIZE - (pa & (PAGE_SIZE-1))) )
+                chunksz = PAGE_SIZE - (pa & (PAGE_SIZE-1));
+            memset(va + (pa & (PAGE_SIZE-1)), 0, chunksz);
+            munmap(va, PAGE_SIZE);
+        }
+    }
+
+    return 0;
+}
+
+static void
+map_memcpy(
+    unsigned long dst, char *src, unsigned long size,
+    int xch, u32 dom, unsigned long *parray, unsigned long vstart)
+{
+    char *va;
+    unsigned long chunksz, done, pa;
+
+    for ( done = 0; done < size; done += chunksz )
+    {
+        pa = dst + done - vstart;
+        va = xc_map_foreign_range(
+            xch, dom, PAGE_SIZE, PROT_WRITE, parray[pa>>PAGE_SHIFT]);
+        chunksz = size - done;
+        if ( chunksz > (PAGE_SIZE - (pa & (PAGE_SIZE-1))) )
+            chunksz = PAGE_SIZE - (pa & (PAGE_SIZE-1));
+        memcpy(va + (pa & (PAGE_SIZE-1)), src + done, chunksz);
+        munmap(va, PAGE_SIZE);
+    }
+}
+
+#define ELFROUND (ELFSIZE / 8)
+
+static int
+loadelfsymtab(
+    char *elfbase, int xch, u32 dom, unsigned long *parray,
+    struct domain_setup_info *dsi)
+{
+    Elf_Ehdr *ehdr = (Elf_Ehdr *)elfbase, *sym_ehdr;
+    Elf_Shdr *shdr;
+    unsigned long maxva, symva;
+    char *p;
+    int h, i;
+
+    p = malloc(sizeof(int) + sizeof(Elf_Ehdr) +
+               ehdr->e_shnum * sizeof(Elf_Shdr));
+    if (p == NULL)
+        return 0;
+
+    maxva = (dsi->v_kernend + ELFROUND - 1) & ~(ELFROUND - 1);
+    symva = maxva;
+    maxva += sizeof(int);
+    dsi->symtab_addr = maxva;
+    dsi->symtab_len = 0;
+    maxva += sizeof(Elf_Ehdr) + ehdr->e_shnum * sizeof(Elf_Shdr);
+    maxva = (maxva + ELFROUND - 1) & ~(ELFROUND - 1);
+
+    shdr = (Elf_Shdr *)(p + sizeof(int) + sizeof(Elf_Ehdr));
+    memcpy(shdr, elfbase + ehdr->e_shoff, ehdr->e_shnum * sizeof(Elf_Shdr));
+
+    for ( h = 0; h < ehdr->e_shnum; h++ ) 
+    {
+        if ( shdr[h].sh_type == SHT_STRTAB )
+        {
+            /* Look for a strtab @i linked to symtab @h. */
+            for ( i = 0; i < ehdr->e_shnum; i++ )
+                if ( (shdr[i].sh_type == SHT_SYMTAB) &&
+                     (shdr[i].sh_link == h) )
+                    break;
+            /* Skip symtab @h if we found no corresponding strtab @i. */
+            if ( i == ehdr->e_shnum )
+            {
+                shdr[h].sh_offset = 0;
+                continue;
+            }
+        }
+
+        if ( (shdr[h].sh_type == SHT_STRTAB) ||
+             (shdr[h].sh_type == SHT_SYMTAB) )
+        {
+            if ( parray != NULL )
+                map_memcpy(maxva, elfbase + shdr[h].sh_offset, shdr[h].sh_size,
+                           xch, dom, parray, dsi->v_start);
+
+            /* Mangled to be based on ELF header location. */
+            shdr[h].sh_offset = maxva - dsi->symtab_addr;
+
+            dsi->symtab_len += shdr[h].sh_size;
+            maxva += shdr[h].sh_size;
+            maxva = (maxva + ELFROUND - 1) & ~(ELFROUND - 1);
+        }
+
+        shdr[h].sh_name = 0;  /* Name is NULL. */
+    }
+
+    if ( dsi->symtab_len == 0 )
+    {
+        dsi->symtab_addr = 0;
+        goto out;
+    }
+
+    if ( parray != NULL )
+    {
+        *(int *)p = maxva - dsi->symtab_addr;
+        sym_ehdr = (Elf_Ehdr *)(p + sizeof(int));
+        memcpy(sym_ehdr, ehdr, sizeof(Elf_Ehdr));
+        sym_ehdr->e_phoff = 0;
+        sym_ehdr->e_shoff = sizeof(Elf_Ehdr);
+        sym_ehdr->e_phentsize = 0;
+        sym_ehdr->e_phnum = 0;
+        sym_ehdr->e_shstrndx = SHN_UNDEF;
+
+        /* Copy total length, crafted ELF header and section header table */
+        map_memcpy(symva, p, sizeof(int) + sizeof(Elf_Ehdr) +
+                   ehdr->e_shnum * sizeof(Elf_Shdr), xch, dom, parray,
+                   dsi->v_start);
+    }
+
+    dsi->symtab_len = maxva - dsi->symtab_addr;
+    dsi->v_end = round_pgup(maxva);
+
+ out:
+    if ( p != NULL )
+        free(p);
+
+    return 0;
+}
index d2b7da0ebab8f2c09bb50abb3ebdb89aa98a9d84..b4ad35b9c04c0a87f9726e5f86aac461f0f3fb1b 100644 (file)
@@ -16,6 +16,7 @@
 #include <arpa/inet.h>
 #include "xc_private.h"
 #include "gzip_stream.h"
+#include "linux_boot_params.h"
 
 /* Needed for Python versions earlier than 2.3. */
 #ifndef PyMODINIT_FUNC
@@ -393,6 +394,87 @@ static PyObject *pyxc_plan9_build(PyObject *self,
     return zero;
 }
 
+static PyObject *pyxc_vmx_build(PyObject *self,
+                                  PyObject *args,
+                                  PyObject *kwds)
+{
+    XcObject *xc = (XcObject *)self;
+
+    u32   dom;
+    char *image, *ramdisk = NULL, *cmdline = "";
+    PyObject *memmap;
+    int   control_evtchn, flags = 0;
+    int numItems, i;
+    struct mem_map mem_map;
+
+    static char *kwd_list[] = { "dom", "control_evtchn", 
+                                "image", "memmap",
+                               "ramdisk", "cmdline", "flags",
+                                NULL };
+
+    if ( !PyArg_ParseTupleAndKeywords(args, kwds, "iisO!|ssi", kwd_list, 
+                                      &dom, &control_evtchn, 
+                                      &image, &PyList_Type, &memmap,
+                                     &ramdisk, &cmdline, &flags) )
+        return NULL;
+
+    memset(&mem_map, 0, sizeof(mem_map));
+    /* Parse memmap */
+
+    /* get the number of lines passed to us */
+    numItems = PyList_Size(memmap) - 1;        /* removing the line 
+                                          containing "memmap" */
+    printf ("numItems: %d\n", numItems);
+    mem_map.nr_map = numItems;
+   
+
+    /* should raise an error here. */
+    if (numItems < 0) return NULL; /* Not a list */
+
+
+    /* iterate over items of the list, grabbing ranges and parsing them */
+    for (i = 1; i <= numItems; i++) {  // skip over "memmap"
+           PyObject *item, *f1, *f2, *f3, *f4;
+           int numFields;
+           unsigned long lf1, lf2, lf3, lf4;
+           char *sf1, *sf2;
+           
+           /* grab the string object from the next element of the list */
+           item = PyList_GetItem(memmap, i); /* Can't fail */
+
+           /* get the number of lines passed to us */
+           numFields = PyList_Size(item);
+
+           if (numFields != 4)
+                   return NULL;
+
+           f1 = PyList_GetItem(item, 0);
+           f2 = PyList_GetItem(item, 1);
+           f3 = PyList_GetItem(item, 2);
+           f4 = PyList_GetItem(item, 3);
+
+           /* Convert objects to strings/longs */
+           sf1 = PyString_AsString(f1);
+           sf2 = PyString_AsString(f2);
+           lf3 = PyLong_AsLong(f3);
+           lf4 = PyLong_AsLong(f4);
+           sscanf(sf1, "%lx", &lf1);
+           sscanf(sf2, "%lx", &lf2);
+
+            mem_map.map[i-1].addr = lf1;
+            mem_map.map[i-1].size = lf2 - lf1;
+            mem_map.map[i-1].type = lf3;
+            mem_map.map[i-1].caching_attr = lf4;
+    }
+
+    if ( xc_vmx_build(xc->xc_handle, dom, image, &mem_map,
+                        ramdisk, cmdline, control_evtchn, flags) != 0 )
+        return PyErr_SetFromErrno(xc_error);
+    
+    Py_INCREF(zero);
+    return zero;
+}
+
 static PyObject *pyxc_bvtsched_global_set(PyObject *self,
                                           PyObject *args,
                                           PyObject *kwds)
@@ -943,6 +1025,17 @@ static PyMethodDef pyxc_methods[] = {
       " cmdline [str, n/a]: Kernel parameters, if any.\n\n"
       "Returns: [int] 0 on success; -1 on error.\n" },
 
+    { "vmx_build", 
+      (PyCFunction)pyxc_vmx_build, 
+      METH_VARARGS | METH_KEYWORDS, "\n"
+      "Build a new Linux guest OS.\n"
+      " dom     [int]:      Identifier of domain to build into.\n"
+      " image   [str]:      Name of kernel image file. May be gzipped.\n"
+      " memmap  [str]:             Memory map.\n\n"
+      " ramdisk [str, n/a]: Name of ramdisk file, if any.\n"
+      " cmdline [str, n/a]: Kernel parameters, if any.\n\n"
+      "Returns: [int] 0 on success; -1 on error.\n" },
+
     { "bvtsched_global_set",
       (PyCFunction)pyxc_bvtsched_global_set,
       METH_VARARGS | METH_KEYWORDS, "\n"
diff --git a/tools/python/xen/util/memmap.py b/tools/python/xen/util/memmap.py
new file mode 100644 (file)
index 0000000..2899a87
--- /dev/null
@@ -0,0 +1,41 @@
+mem_caching_attr = {
+    'UC' : 0,
+    'WC' : 1,
+    'WT' : 4,
+    'WP' : 5,
+    'WB' : 6,
+    };
+
+e820_mem_type = {
+    'AddressRangeMemory'    : 1,
+    'AddressRangeReserved'  : 2,
+    'AddressRangeACPI'      : 3,
+    'AddressRangeNVS'       : 4,
+    'AddressRangeIO'        : 16,
+    'AddressRangeShared'    : 17,
+};
+
+MT_COL = 2
+MA_COL = 3
+
+def strmap(row):
+   if (type(row) != type([])):
+       return row
+   row[MT_COL] = e820_mem_type[row[MT_COL]]
+   row[MA_COL] = mem_caching_attr[row[MA_COL]]
+   return row
+
+def memmap_parse(memmap):
+    return map(strmap, memmap)
+
+if __name__ == '__main__':
+   memmap = [ 'memmap',
+              [ '1', '2', 'AddressRangeMemory', 'UC'],
+              [ '1', '2', 'AddressRangeReserved', 'UC'],
+              [ '1', '2', 'AddressRangeACPI', 'WB'],
+              [ '1', '2', 'AddressRangeNVS', 'WB'],
+              [ '1', '2', 'AddressRangeIO', 'WB'],
+              [ '1', '2', 'AddressRangeShared', 'WB']]
+   print memmap_parse(memmap);
+
+
index 1a229a5599813fad3927a987daa3db8dc1e08df7..c92bdf08bcac5a46823de3ed2b4a6b2685050043 100644 (file)
@@ -20,6 +20,7 @@ from twisted.internet import defer
 import xen.lowlevel.xc; xc = xen.lowlevel.xc.new()
 import xen.util.ip
 from xen.util.ip import _readline, _readlines
+from xen.xend.server import channel
 
 import sxp
 
@@ -319,6 +320,7 @@ class XendDomainInfo:
         self.restart_time = None
         self.console_port = None
         self.savedinfo = None
+        self.is_vmx = 0
 
     def setdom(self, dom):
         """Set the domain id.
@@ -720,7 +722,7 @@ class XendDomainInfo:
         log.debug('init_domain> Created domain=%d name=%s memory=%d', dom, name, memory)
         self.setdom(dom)
 
-    def build_domain(self, ostype, kernel, ramdisk, cmdline):
+    def build_domain(self, ostype, kernel, ramdisk, cmdline, memmap):
         """Build the domain boot image.
         """
         if self.recreate or self.restore: return
@@ -735,17 +737,26 @@ class XendDomainInfo:
         flags = 0
         if self.netif_backend: flags |= SIF_NET_BE_DOMAIN
         if self.blkif_backend: flags |= SIF_BLK_BE_DOMAIN
-        err = buildfn(dom            = dom,
-                      image          = kernel,
-                      control_evtchn = self.console.getRemotePort(),
-                      cmdline        = cmdline,
-                      ramdisk        = ramdisk,
-                      flags          = flags)
+       if ostype == "vmx":
+               err = buildfn(dom      = dom,
+                               image          = kernel,
+                       control_evtchn = 0,
+                       memmap         = memmap,
+                       cmdline        = cmdline,
+                       ramdisk        = ramdisk,
+                       flags          = flags)
+       else:
+               err = buildfn(dom            = dom,
+                               image          = kernel,
+                       control_evtchn = self.console.getRemotePort(),
+                       cmdline        = cmdline,
+                       ramdisk        = ramdisk,
+                       flags          = flags)
         if err != 0:
             raise VmError('Building domain failed: type=%s dom=%d err=%d'
                           % (ostype, dom, err))
 
-    def create_domain(self, ostype, kernel, ramdisk, cmdline):
+    def create_domain(self, ostype, kernel, ramdisk, cmdline, memmap=''):
         """Create a domain. Builds the image but does not configure it.
 
         @param ostype:  OS type
@@ -760,7 +771,7 @@ class XendDomainInfo:
         else:
             self.console = xendConsole.console_create(
                 self.dom, console_port=self.console_port)
-        self.build_domain(ostype, kernel, ramdisk, cmdline)
+        self.build_domain(ostype, kernel, ramdisk, cmdline, memmap)
         self.image = kernel
         self.ramdisk = ramdisk
         self.cmdline = cmdline
@@ -804,6 +815,18 @@ class XendDomainInfo:
             index[dev_name] = dev_index + 1
         deferred = defer.DeferredList(dlist, fireOnOneErrback=1)
         deferred.addErrback(dlist_err)
+        if self.is_vmx:
+            device_model = sxp.child_value(self.config, 'device_model')
+            device_config = sxp.child_value(self.config, 'device_config')
+            memory = sxp.child_value(self.config, "memory")
+            # Create an event channel
+            device_channel = channel.eventChannel(0, self.dom)
+            # Fork and exec device_model -f device_config <port>
+            os.system(device_model
+                      + " -f %s" % device_config
+                      + " -d %d" % self.dom
+                      + " -p %d" % device_channel['port1']
+                      + " -m %s &" % memory)
         return deferred
 
     def device_create(self, dev_config):
@@ -1091,7 +1114,33 @@ def vm_image_plan9(vm, image):
     vm.create_domain("plan9", kernel, ramdisk, cmdline)
     return vm
     
-    
+def vm_image_vmx(vm, image):
+    """Create a VM for the VMX environment.
+
+    @param name:      vm name
+    @param memory:    vm memory
+    @param image:     image config
+    @return: vm
+    """
+    kernel = sxp.child_value(image, "kernel")
+    cmdline = ""
+    ip = sxp.child_value(image, "ip", "dhcp")
+    if ip:
+        cmdline += " ip=" + ip
+    root = sxp.child_value(image, "root")
+    if root:
+        cmdline += " root=" + root
+    args = sxp.child_value(image, "args")
+    if args:
+        cmdline += " " + args
+    ramdisk = sxp.child_value(image, "ramdisk", '')
+    memmap = sxp.child_value(vm.config, "memmap", '')
+    memmap = sxp.parse(open(memmap))[0]
+    from xen.util.memmap import memmap_parse
+    memmap = memmap_parse(memmap)
+    vm.create_domain("vmx", kernel, ramdisk, cmdline, memmap)
+    vm.is_vmx = 1
+    return vm
 
 def vm_dev_vif(vm, val, index, change=0):
     """Create a virtual network interface (vif).
@@ -1215,6 +1264,7 @@ def vm_field_maxmem(vm, config, val, index):
 # Register image handlers.
 add_image_handler('linux',  vm_image_linux)
 add_image_handler('plan9',  vm_image_plan9)
+add_image_handler('vmx',  vm_image_vmx)
 
 # Register device handlers.
 add_device_handler('vif',  vm_dev_vif)
index 9d977dd337104d5af77b5b1c60d59760911de861..633888a6435b694a4b6ad8bf1f2226a73662a537 100644 (file)
@@ -210,6 +210,18 @@ gopts.var('nfs_root', val="PATH",
           fn=set_value, default=None,
           use="Set the path of the root NFS directory.")
 
+gopts.var('memmap', val='FILE',
+          fn=set_value, default='',
+          use="Path to memap SXP file.")
+
+gopts.var('device_model', val='FILE',
+          fn=set_value, default='',
+          use="Path to device model program.")
+
+gopts.var('device_config', val='FILE',
+          fn=set_value, default='',
+          use="Path to device model configuration.")
+
 def strip(pre, s):
     """Strip prefix 'pre' if present.
     """
@@ -309,6 +321,15 @@ def configure_vfr(config, vals):
          config_vfr.append(['vif', ['id', idx], ['ip', ip]])
      config.append(config_vfr)
 
+def configure_vmx(config_devs, vals):
+    """Create the config for VMX devices.
+    """
+    memmap = vals.memmap
+    device_model = vals.device_model
+    device_config = vals.device_config
+    config_devs.append(['memmap', memmap])
+    config_devs.append(['device_model', device_model])
+    config_devs.append(['device_config', device_config])
 
 def make_config(vals):
     """Create the domain configuration.
@@ -337,6 +358,7 @@ def make_config(vals):
     configure_disks(config_devs, vals)
     configure_pci(config_devs, vals)
     configure_vifs(config_devs, vals)
+    configure_vmx(config_devs, vals)
     config += config_devs
     return config